This document will allow you to reproduce all supplementary figures.
Set working directory and load all necessary libraries and functions.
# If not installed, install and load the package "here", else: only load the package.
err <- try(library("here", character.only = TRUE), silent = TRUE)
## here() starts at /Users/lgoeminn/Documents/phD/MSqRobHurdlePaper
if (class(err) == 'try-error') {
install.packages("here", repos = "https://cloud.r-project.org")
library("here", character.only = TRUE)
}
wd <- here()
# Optional: change the working directory
setwd(wd)
# Load all functions and libraries
source(paste0(wd, "/R/functions.r"))
## Loading required namespace: BiocManager
##
## Attaching package: 'devtools'
## The following objects are masked from 'package:remotes':
##
## dev_package_deps, github_pull, github_release, install_bioc,
## install_bitbucket, install_cran, install_deps, install_git,
## install_github, install_local, install_svn, install_url,
## install_version, package_deps, update_packages
## ── Attaching packages ───────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.5
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:dplyr':
##
## exprs
## Loading required package: mzR
## Loading required package: Rcpp
## Warning in fun(libname, pkgname): mzR has been built against a different Rcpp version (0.12.16)
## than is installed on your system (0.12.17). This might lead to errors
## when loading mzR. If you encounter such issues, please send a report,
## including the output of sessionInfo() to the Bioc support forum at
## https://support.bioconductor.org/. For details see also
## https://github.com/sneumann/mzR/wiki/mzR-Rcpp-compiler-linker-issue.
## Loading required package: BiocParallel
## Loading required package: ProtGenerics
##
## This is MSnbase version 2.6.1
## Visit https://lgatto.github.io/MSnbase/ to get started.
##
## Attaching package: 'MSnbase'
## The following object is masked from 'package:stats':
##
## smooth
## The following object is masked from 'package:base':
##
## trimws
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: future
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:future':
##
## values
## The following object is masked from 'package:Matrix':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply
##
## Attaching package: 'stageR'
## The following object is masked from 'package:methods':
##
## getMethod
source(paste0(wd,"/R/plot_functions.R"))
Give session info for reproducibility.
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] nl_BE.UTF-8/nl_BE.UTF-8/nl_BE.UTF-8/C/nl_BE.UTF-8/nl_BE.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] MSqRob_0.7.5 stageR_1.3.29
## [3] SummarizedExperiment_1.10.1 DelayedArray_0.6.0
## [5] matrixStats_0.53.1 GenomicRanges_1.32.3
## [7] GenomeInfoDb_1.16.0 IRanges_2.14.10
## [9] S4Vectors_0.18.3 furrr_0.1.0.9002
## [11] future_1.10.0 readxl_1.1.0
## [13] colorspace_1.3-2 zoo_1.8-2
## [15] lme4_1.1-17 Matrix_1.2-14
## [17] limma_3.36.1 MSnbase_2.6.1
## [19] ProtGenerics_1.12.0 BiocParallel_1.14.1
## [21] mzR_2.14.0 Rcpp_0.12.17
## [23] Biobase_2.40.0 BiocGenerics_0.26.0
## [25] forcats_0.3.0 stringr_1.3.1
## [27] dplyr_0.7.5 purrr_0.2.5
## [29] readr_1.1.1 tidyr_0.8.1
## [31] tibble_1.4.2 ggplot2_3.0.0
## [33] tidyverse_1.2.1 MSstats_3.12.2
## [35] devtools_1.13.5 remotes_2.0.2
## [37] here_0.1
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 rprojroot_1.3-2 XVector_0.20.0
## [4] rstudioapi_0.7 listenv_0.7.0 affyio_1.50.0
## [7] ggrepel_0.8.0 lubridate_1.7.4 xml2_1.2.0
## [10] codetools_0.2-15 splines_3.5.0 mnormt_1.5-5
## [13] doParallel_1.0.11 impute_1.54.0 knitr_1.20
## [16] jsonlite_1.5 nloptr_1.0.4 broom_0.4.4
## [19] vsn_3.48.1 BiocManager_1.30.4 compiler_3.5.0
## [22] httr_1.3.1 backports_1.1.2 assertthat_0.2.0
## [25] lazyeval_0.2.1 cli_1.0.0 htmltools_0.3.6
## [28] tools_3.5.0 bindrcpp_0.2.2 GenomeInfoDbData_1.1.0
## [31] gtable_0.2.0 glue_1.2.0 affy_1.58.0
## [34] reshape2_1.4.3 MALDIquant_1.17 cellranger_1.1.0
## [37] gdata_2.18.0 preprocessCore_1.42.0 nlme_3.1-137
## [40] iterators_1.0.9 psych_1.8.4 globals_0.12.4
## [43] rvest_0.3.2 gtools_3.5.0 XML_3.98-1.11
## [46] MASS_7.3-50 zlibbioc_1.26.0 scales_0.5.0
## [49] BiocInstaller_1.30.0 pcaMethods_1.72.0 hms_0.4.2
## [52] doSNOW_1.0.16 yaml_2.1.19 memoise_1.1.0
## [55] stringi_1.2.3 foreach_1.4.4 randomForest_4.6-14
## [58] caTools_1.17.1 rlang_0.3.0.1 pkgconfig_2.0.1
## [61] bitops_1.0-6 mzID_1.18.0 evaluate_0.10.1
## [64] lattice_0.20-35 bindr_0.1.1 tidyselect_0.2.4
## [67] plyr_1.8.4 magrittr_1.5 R6_2.2.2
## [70] snow_0.4-2 gplots_3.0.1 pillar_1.2.3
## [73] haven_1.1.1 foreign_0.8-70 withr_2.1.2
## [76] RCurl_1.95-4.10 survival_2.42-3 modelr_0.1.2
## [79] crayon_1.3.4 KernSmooth_2.23-15 rmarkdown_1.10
## [82] grid_3.5.0 minpack.lm_1.2-1 data.table_1.11.4
## [85] marray_1.58.0 digest_0.6.18 munsell_0.5.0
Important: make sure you have run the CPTAC and HEART analyses, or load the necessary files as follows:
load(paste0(wd, "/save_files_CPTAC/peptides_CPTAC2.RData"))
load(paste0(wd, "/save_files_CPTAC/peptides_CPTAC_KNN1.RData"))
load(paste0(wd, "/save_files_CPTAC/peptides_CPTAC_Perseus_imp.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_co_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_Hurdle.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_PI_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_KNN1_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_MSstats_full.RData"))
load(paste0(wd, "/save_files_CPTAC/CPTAC_Perseus_full.RData"))
load(paste0(wd, "/save_files_CPTAC/CPTAC_Perseus_imp_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_ProPCA_full.RData"))
load(paste0(wd, "/save_files_CPTAC/testResultOneComparison.RData"))
load(paste0(wd, "/save_files_PXD006675/peptides_HEART2.RData"))
load(paste0(wd, "/save_files_PXD006675/res_HEART_Hurdle.RData"))
load(paste0(wd, "/save_files_PXD006675/overlap_PHM.RData"))
We here show the sample-wise effect of kNN and Perseus imputation at the level of MaxQuant’s LFQ-values (protein level). Blue are the original values, red are the imputed values.
# Import non-imputed data
file.proteinGroups <- paste0(wd,"/datasets/CPTAC/proteinGroups_preprocessed.txt")
exprs_col <- grepEcols(file.proteinGroups, "LFQ.intensity.", split = "\t")
proteinGroups.CPTAC <- readMSnSet2(file.proteinGroups, ecol = exprs_col, fnames = c("Protein.IDs"), sep = "\t")
sampleNames(proteinGroups.CPTAC) <- str_replace(sampleNames(proteinGroups.CPTAC), "LFQ.intensity.", "") %>% make.names
# Import Perseus-imputed data
file.proteinGroups <- paste0(wd,"/datasets/CPTAC/proteinGroups_preprocessed_imputed.txt")
exprs_col <- grepEcols(file.proteinGroups, "LFQ.intensity.", split = "\t")
proteinGroups.CPTAC.Perseus.imp <- readMSnSet2(file.proteinGroups, ecol = exprs_col, fnames = c("Protein.IDs"), sep = "\t")
sampleNames(proteinGroups.CPTAC.Perseus.imp) <- str_replace(sampleNames(proteinGroups.CPTAC.Perseus.imp), "LFQ.intensity.", "") %>% make.names
sample.names <- paste0(sampleNames(proteinGroups.CPTAC) %>% substr(3, 3), sampleNames(proteinGroups.CPTAC) %>% substr(5, 5))
### Impute with KNN-1
proteinGroups.CPTAC.KNN1 <- impute(proteinGroups.CPTAC, method = "knn")
## Warning in knnimp(x, k, maxmiss = rowmax, maxp = maxp): 700 rows with more than 50 % entries missing;
## mean imputation used for these rows
cex.main <- 1.2
cex.lab <- 1
cex <- 1
par(mfrow=c(1,1))
# 1. Histograms for the effect of Perseus imputation LFQ-level (Supplementary Fig. 1)
# grDevices::svg(paste0(wd,"/Perseus_protein_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))
for(i in 1:ncol(exprs(proteinGroups.CPTAC.Perseus.imp))){
p1 <- hist(exprs(proteinGroups.CPTAC.Perseus.imp[,i]), breaks=25, plot = FALSE)
p2 <- hist(exprs(proteinGroups.CPTAC[,i]), breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(12,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("Perseus imputation sample ", sample.names[i]), xlab = "log2(LFQ intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex) # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(12,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE) # second histogram
}
# dev.off()
# 2. Histograms for the effect of kNN imputation at LFQ-level (Supplementary Fig. 2)
# grDevices::svg(paste0(wd,"/kNN_protein_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))
for(i in 1:ncol(exprs(proteinGroups.CPTAC.KNN1))){
p1 <- hist(exprs(proteinGroups.CPTAC.KNN1)[,i], breaks=25, plot = FALSE)
p2 <- hist(exprs(proteinGroups.CPTAC)[,i], breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(13,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("kNN imputation sample ", sample.names[i]), xlab = "log2(LFQ intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex) # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(13,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE) # second histogram
}
# dev.off()
We here show the sample-wise effect of kNN and Perseus imputation at the peptide level. Blue are the original values, red are the imputed values.
# 3. Histograms for the effect of Perseus imputation at peptide-level (Supplementary Fig. 3)
# grDevices::svg(paste0(wd,"/Perseus_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))
for(i in 1:ncol(exprs(peptides.CPTAC.Perseus.imp))){
p1 <- hist(exprs(peptides.CPTAC.Perseus.imp[,i]), breaks=25, plot = FALSE)
p2 <- hist(exprs(peptides.CPTAC2[,i]), breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(12,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("Perseus imputation sample ", sample.names[i]), xlab = "log2(peptide intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex) # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(12,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE) # second histogram
}
# dev.off()
# 4. Histograms for the effect of kNN imputation at peptide-level (Supplementary Fig. 4)
sample.names <- paste0(sampleNames(peptides.CPTAC.KNN1) %>% substr(3, 3), sampleNames(peptides.CPTAC.KNN1) %>% substr(5, 5))
cex.main <- 1.2
cex.lab <- 1
cex <- 1
par(mfrow=c(1,1))
# grDevices::svg(paste0(wd,"/kNN_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))
for(i in 1:ncol(exprs(peptides.CPTAC.KNN1))){
p1 <- hist(exprs(peptides.CPTAC.KNN1)[,i], breaks=25, plot = FALSE)
p2 <- hist(exprs(peptides.CPTAC2)[,i], breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(13,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("kNN imputation sample ", sample.names[i]), xlab = "log2(peptide intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex) # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(13,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE) # second histogram
}
# dev.off()
# FDR-FTP plot including MSstats, MSstats with Inf and kNN, based on our preprocessing
res.list <- list(res.CPTAC.full,
res.CPTAC.co.full,
res.CPTAC.Hurdle,
res.CPTAC.KNN1.full,
res.CPTAC.PI.full,
CPTAC.Perseus.full,
CPTAC.Perseus.imp.full,
res.CPTAC.MSstats.full,
res.CPTAC.MSstats.full[!is.infinite(res.CPTAC.MSstats.full$log2FC),],
res.CPTAC.ProPCA.full
)
contrast.vec <- c("condition6B-condition6A",
"condition6C-condition6B",
"condition6C-condition6A")
names(contrast.vec) <- c("Condition 6B vs. 6A",
"Condition 6C vs. 6B",
"Condition 6C vs. 6A")
colors <- c("#FF681E", "#E15E9E", "#5A2A82", "blue", "green", "#1B2944", "#42B7BA", "yellow3", "peru", "red2") # darkblue: "#1B2944" gray: "#50FF00"
sort.list <- list(c("qvalue", "pvalue", "t", "logFC"),
c("qvalue", "pvalue", "t", "logOR"),
c("qchisq", "pchisq", "chisq", NA),
c("qvalue", "pvalue", "t", "logFC"),
c("qvalue", "pvalue", "t", "logFC"),
c("q.value", "p.value", "Test.statistic", "Difference"),
c("q.value", "p.value", "Test.statistic", "Difference"),
c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
c("adj.P.Val", "P.Value", "t", "logFC"))
PlotFDPTPR(res.list, contrast.vec, colors, sort.list, TPcol = c("UPS"), plotSVG = FALSE) #TRUE
# Since in our preprocessing, 2 significant proteins were removed from the MSstats output, we also redo the whole thing with MSstats preprocessing as a reference.
res.CPTAC.MSstats <- as.tibble(testResultOneComparison$ComparisonResult)
res.CPTAC.MSstats <- res.CPTAC.MSstats %>% dplyr::rename(protein = Protein, contrast = Label)
res.CPTAC.MSstats$contrast <- res.CPTAC.MSstats$contrast %>% as.character
res.CPTAC.MSstats$contrast[res.CPTAC.MSstats$contrast == "6B-6A"] <- "condition6B-condition6A"
res.CPTAC.MSstats$contrast[res.CPTAC.MSstats$contrast == "6C-6A"] <- "condition6C-condition6A"
res.CPTAC.MSstats$contrast[res.CPTAC.MSstats$contrast == "6C-6B"] <- "condition6C-condition6B"
res.CPTAC.Base2 <- res.CPTAC.MSstats %>% select(protein, contrast)
res.CPTAC.Base2$UPS <- grepl("ups", res.CPTAC.Base2$protein)
res.CPTAC.MSstats.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.MSstats)
## Joining, by = c("protein", "contrast")
res.CPTAC.MSstats.full2 <- res.CPTAC.MSstats.full2 %>% arrange(adj.pvalue,pvalue)
res.CPTAC.MSstats.noInf.full2 <- res.CPTAC.MSstats.full2
res.CPTAC.MSstats.noInf.full2[is.infinite(res.CPTAC.MSstats.noInf.full2$log2FC),][,c("log2FC", "SE", "Tvalue", "DF", "pvalue", "adj.pvalue")] <- NA
res.CPTAC.MSstats.noInf.full2 <- res.CPTAC.MSstats.noInf.full2 %>% arrange(adj.pvalue,pvalue)
# Perseus without imputation #
# 1. Import Perseus results
results.Perseus <- read.table(paste0(wd,"/datasets/CPTAC/t-tests_Perseus_1_6_0_7-6A-6B-6C.txt"), sep="\t", header=TRUE, quote = "", comment.char = "")
results.Perseus <- results.Perseus[-c(1,2),] %>% map_dfr(~{.x %>% as.character %>% type.convert})
# 2. Convert wide to long
results.Perseus <- results.Perseus[,43:58]
colnames(results.Perseus) <- gsub("Student.s.T.test.","",colnames(results.Perseus))
res.Perseus <- results.Perseus %>% reshape(timevar = "contrast", varying =,1:12, direction = "long", sep = ".6")
## Warning: Setting row names on a tibble is deprecated.
## Warning: Setting row names on a tibble is deprecated.
## Warning: Setting row names on a tibble is deprecated.
res.Perseus$contrast[res.Perseus$contrast == "B_6A"] <- "condition6B-condition6A"
res.Perseus$contrast[res.Perseus$contrast == "C_6A"] <- "condition6C-condition6A"
res.Perseus$contrast[res.Perseus$contrast == "C_6B"] <- "condition6C-condition6B"
res.Perseus <- res.Perseus %>% dplyr::rename(protein = Majority.protein.IDs, protein.name = Protein.names, gene.name = Gene.names) %>% select(-Protein.IDs)
UPS.Perseus <- unique(res.Perseus$protein)[grepl("ups", unique(res.Perseus$protein))]
UPS.MSstats <- unique(res.CPTAC.MSstats$protein)[grepl("ups", unique(res.CPTAC.MSstats$protein))]
UPS.Hurdle <- unique(res.CPTAC.Hurdle$protein)[grepl("ups", unique(res.CPTAC.Hurdle$protein))]
UPS.MSstats[!(UPS.MSstats %in% UPS.Perseus)]
## [1] P01008ups;CON__P41361 P68871ups;CON__P02070;CON__Q3SX09
## [3] P99999ups;CON__P62894
## 1446 Levels: A5Z2X5 D6VTK4 O00762ups O13516 O13547 O13563 O14455 ... Q99385
# P01008ups;CON__P41361 P68871ups;CON__P02070;CON__Q3SX09 P99999ups;CON__P62894
UPS.Perseus[!(UPS.Perseus %in% UPS.MSstats)]
## [1] P01008ups P68871ups P99999ups
## 1462 Levels: A5Z2X5 D6VTK4 O00762ups O13516 O13547 O13563 O14455 ... Q99385
# P01008ups P68871ups P99999ups
# -> all three of them link perfectly with each other, thus we will fill them in
CPTAC.Perseus.full2 <- res.CPTAC.Base2 %>% left_join(res.Perseus)
## Joining, by = c("protein", "contrast")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
CPTAC.Perseus.full2[CPTAC.Perseus.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus[res.Perseus$protein == "P01008ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]
CPTAC.Perseus.full2[CPTAC.Perseus.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus[res.Perseus$protein == "P68871ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]
CPTAC.Perseus.full2[CPTAC.Perseus.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus[res.Perseus$protein == "P99999ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]
CPTAC.Perseus.full2 <- CPTAC.Perseus.full2 %>% arrange(p.value)
# Perseus with imputation #
results.Perseus.imp <- read.table(paste0(wd,"/datasets/CPTAC/t-tests_Perseus_imputed_1_6_0_7-6A-6B-6C.txt"), sep="\t", header=TRUE, quote = "", comment.char = "")
results.Perseus.imp <- results.Perseus.imp[-c(1,2),] %>% map_dfr(~{.x %>% as.character %>% type.convert})
# 2. Convert wide to long
results.Perseus.imp <- results.Perseus.imp[,43:58]
colnames(results.Perseus.imp) <- gsub("Student.s.T.test.","",colnames(results.Perseus.imp))
res.Perseus.imp <- results.Perseus.imp %>% reshape(timevar = "contrast", varying =,1:12, direction = "long", sep = ".6")
## Warning: Setting row names on a tibble is deprecated.
## Warning: Setting row names on a tibble is deprecated.
## Warning: Setting row names on a tibble is deprecated.
res.Perseus.imp$contrast[res.Perseus.imp$contrast == "B_6A"] <- "condition6B-condition6A"
res.Perseus.imp$contrast[res.Perseus.imp$contrast == "C_6A"] <- "condition6C-condition6A"
res.Perseus.imp$contrast[res.Perseus.imp$contrast == "C_6B"] <- "condition6C-condition6B"
res.Perseus.imp <- res.Perseus.imp %>% dplyr::rename(protein = Majority.protein.IDs, protein.name = Protein.names, gene.name = Gene.names) %>% select(-Protein.IDs)
CPTAC.Perseus.imp.full2 <- res.CPTAC.Base2 %>% left_join(res.Perseus.imp)
## Joining, by = c("protein", "contrast")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
CPTAC.Perseus.imp.full2[CPTAC.Perseus.imp.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus.imp[res.Perseus.imp$protein == "P01008ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]
CPTAC.Perseus.imp.full2[CPTAC.Perseus.imp.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus.imp[res.Perseus.imp$protein == "P68871ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]
CPTAC.Perseus.imp.full2[CPTAC.Perseus.imp.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus.imp[res.Perseus.imp$protein == "P99999ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]
CPTAC.Perseus.imp.full2 <- CPTAC.Perseus.imp.full2 %>% arrange(p.value)
# Hurdle model #
UPS.MSstats[!(UPS.MSstats %in% UPS.Hurdle)]
## [1] P01008ups;CON__P41361 P01112ups
## [3] P09211ups P62988ups
## [5] P68871ups;CON__P02070;CON__Q3SX09 P99999ups;CON__P62894
## 1446 Levels: A5Z2X5 D6VTK4 O00762ups O13516 O13547 O13563 O14455 ... Q99385
# P01008ups;CON__P41361 P01112ups P09211ups P62988ups P68871ups;CON__P02070;CON__Q3SX09 P99999ups;CON__P62894
UPS.Hurdle[!(UPS.Hurdle %in% UPS.MSstats)]
## [1] P99999ups P01008ups P68871ups
## [4] P05759;P0CG63;P62988ups
## 1655 Levels: A5Z2X5 ... Q99385
# P99999ups P01008ups P68871ups P05759;P0CG63;P62988ups
# -> everything except P01112ups and P09211ups link up
# => insert the estimates of the 4 others
res.CPTAC.Hurdle2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.Hurdle)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]
res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]
res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]
res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]
res.CPTAC.Hurdle2 <- res.CPTAC.Hurdle2 %>% arrange(pchisq)
# MSqRob without preprocessing #
res.CPTAC.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.full2[res.CPTAC.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.full2[res.CPTAC.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.full2[res.CPTAC.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.full2[res.CPTAC.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.full2 <- res.CPTAC.full2 %>% arrange(pvalue)
# Quasibinomial model #
res.CPTAC.co.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.co.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.co.full2 <- res.CPTAC.co.full2 %>% arrange(pvalue)
# MSqRob with kNN imputation #
res.CPTAC.KNN1.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.KNN1.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.KNN1.full2 <- res.CPTAC.KNN1.full2 %>% arrange(pvalue)
# MSqRob with Perseus imputation #
res.CPTAC.PI.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.PI.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]
res.CPTAC.PI.full2 <- res.CPTAC.PI.full2 %>% arrange(pvalue)
# ProPCA with limma #
res.CPTAC.ProPCA.full
## # A tibble: 4,143 x 11
## protein UPS gene.name protein.name contrast logFC AveExpr t
## <chr> <lgl> <fct> <fct> <chr> <dbl> <dbl> <dbl>
## 1 Q06830ups TRUE "" "" condition6C… 0.552 1.91 32.9
## 2 Q06830ups TRUE "" "" condition6C… 0.313 1.91 18.7
## 3 Q06830ups TRUE "" "" condition6B… 0.238 1.91 14.2
## 4 P12081ups TRUE "" "" condition6C… 0.822 1.52 9.85
## 5 P10636-8… TRUE "" "" condition6C… 1.00 2.12 9.45
## 6 P00915ups TRUE "" "" condition6C… 0.336 0.633 7.45
## 7 P12081ups TRUE "" "" condition6B… 0.611 1.52 7.32
## 8 P02144ups TRUE "" "" condition6C… 0.159 0.877 7.00
## 9 P10145ups TRUE "" "" condition6C… 0.145 0.876 6.89
## 10 P55957ups TRUE "" "" condition6C… 0.593 0.688 6.65
## # ... with 4,133 more rows, and 3 more variables: P.Value <dbl>,
## # adj.P.Val <dbl>, B <dbl>
res.CPTAC.ProPCA.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.ProPCA.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]
res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]
res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]
res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]
res.CPTAC.ProPCA.full2 <- res.CPTAC.ProPCA.full2 %>% arrange(P.Value)
### FDR-FTP plots with MSstats preprocessing as a basis (Supplementary Fig. 6) ###
res.list <- list(res.CPTAC.full2,
res.CPTAC.co.full2,
res.CPTAC.Hurdle2,
res.CPTAC.KNN1.full2,
res.CPTAC.PI.full2,
CPTAC.Perseus.full2,
CPTAC.Perseus.imp.full2,
res.CPTAC.MSstats.full2,
res.CPTAC.MSstats.full2[!is.infinite(res.CPTAC.MSstats.full2$log2FC),],
res.CPTAC.ProPCA.full2
)
contrast.vec <- c("condition6B-condition6A",
"condition6C-condition6B",
"condition6C-condition6A")
names(contrast.vec) <- c("Condition 6B vs. 6A",
"Condition 6C vs. 6B",
"Condition 6C vs. 6A")
colors <- c("#FF681E", "#E15E9E", "#5A2A82", "blue", "green", "#1B2944", "#42B7BA", "yellow3", "peru", "red2") # darkblue: "#1B2944" gray: "#50FF00"
sort.list <- list(c("qvalue", "pvalue", "t", "logFC"),
c("qvalue", "pvalue", "t", "logOR"),
c("qchisq", "pchisq", "chisq", NA),
c("qvalue", "pvalue", "t", "logFC"),
c("qvalue", "pvalue", "t", "logFC"),
c("q.value", "p.value", "Test.statistic", "Difference"),
c("q.value", "p.value", "Test.statistic", "Difference"),
c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
c("adj.P.Val", "P.Value", "t", "logFC"))
PlotFDPTPR(res.list, contrast.vec, colors, sort.list, TPcol = c("UPS"), plotSVG = FALSE) #TRUE
# Make the plots
# BA, CB, CA
upratio <- c(1.565597, 1.571906, 3.137504)
# 1. Top: condition 6B vs. 6A
TP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6B-condition6A" & UPS)
FP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6B-condition6A" & !UPS)
scatterHist(x = ((FP.CvsB$logOR) %>% unlist),
y = ((FP.CvsB$logFC) %>% unlist),
x2 = ((TP.CvsB$logOR) %>% unlist),
y2 = ((TP.CvsB$logFC) %>% unlist),
main = "Condition B vs. A", plotSVG = FALSE, filename = paste0(wd,"/scatterhistBA.svg")) #, plotSVG = TRUE, filename = paste0(wd,"/scatterhist.svg")
# 2. Middle: condition 6C vs. 6A
TP.CvsA <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6A" & UPS)
FP.CvsA <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6A" & !UPS)
scatterHist(x = ((FP.CvsA$logOR) %>% unlist),
y = ((FP.CvsA$logFC) %>% unlist),
x2 = ((TP.CvsA$logOR) %>% unlist),
y2 = ((TP.CvsA$logFC) %>% unlist),
main = "Condition C vs. A", plotSVG = FALSE, filename = paste0(wd,"/scatterhistCA.svg")) #, plotSVG = TRUE, filename = paste0(wd,"/scatterhist.svg")
# 3. Bottom: condition 6C vs. 6B
TP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6B" & UPS)
FP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6B" & !UPS)
scatterHist(x = ((FP.CvsB$logOR) %>% unlist),
y = ((FP.CvsB$logFC) %>% unlist),
x2 = ((TP.CvsB$logOR) %>% unlist),
y2 = ((TP.CvsB$logFC) %>% unlist),
main = "Condition C vs. B", plotSVG = FALSE, filename = paste0(wd,"/scatterhistCB.svg")) #, plotSVG = TRUE, filename = paste0(wd,"/scatterhist.svg")
Aditionally, we here provide the plots for the underlying (preprocessed) peptide-level data for the 1500 most significant proteins for each method in the HEART dataset, grouped per method.
### Overlap all (Supplementary Fig. 8)
plot_proteins(gene.names = "MYL7", title = "MYL7", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "NPPA", title = "NPPA", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "PAM", title = "PAM", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "MYBPHL", title = "MYBPHL", msnset = peptides.HEART2, pdf = FALSE)
# plot_proteinsSVG(gene.names = c("MYL7", "NPPA", "PAM", "MYBPHL"), title = c("overlap_all.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.MSqRob.AvsV.1500[(genes.MSqRob.AvsV.1500 %in% genes.Perseus.AvsV.1500) & (genes.MSqRob.AvsV.1500 %in% genes.Hurdle.AvsV.1500)], title = "overlap_all.pdf", msnset = peptides.HEART2, pdf = TRUE)
### Overlap Hurdle and Perseus (Supplementary Fig. 9)
plot_proteins(gene.names = "CHGB", title = "CHGB", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "PRKCD", title = "PRKCD", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "PRR33", title = "PRR33", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "NTN1", title = "NTN1", msnset = peptides.HEART2, pdf = FALSE)
# plot_proteinsSVG(gene.names = c("CHGB", "PRKCD", "PRR33", "NTN1"), title = c("overlap_hurdle_Perseus.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.Hurdle.AvsV.1500[(genes.Hurdle.AvsV.1500 %in% genes.Perseus.AvsV.1500) & !(genes.Hurdle.AvsV.1500 %in% genes.MSqRob.AvsV.1500)], title = "overlap_hurdle_Perseus.pdf", msnset = peptides.HEART2, pdf = TRUE)
### Hurdle alone (Supplementary Fig. 10)
plot_proteins(gene.names = "PDS5A", title = "PDS5A", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "CNOT1", title = "CNOT1", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "PLVAP", title = "PLVAP", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "RELA", title = "RELA", msnset = peptides.HEART2, pdf = FALSE)
# plot_proteinsSVG(gene.names = c("PDS5A", "CNOT1", "PLVAP", "RELA"), title = c("only_hurdle.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.Hurdle.AvsV.1500[!(genes.Hurdle.AvsV.1500 %in% genes.MSqRob.AvsV.1500) & !(genes.Hurdle.AvsV.1500 %in% genes.Perseus.AvsV.1500)], title = "only_hurdle.pdf", msnset = peptides.HEART2, pdf = TRUE)
### Perseus alone (Supplementary Fig. 11)
plot_proteins(gene.names = "ASNSD1", title = "ASNSD1", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "LMO7", title = "LMO7", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "MTCL1", title = "MTCL1", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "BLOC1S5;BLOC1S5-TXNDC5", title = "BLOC1S5;BLOC1S5-TXNDC5", msnset = peptides.HEART2, pdf = FALSE)
# plot_proteinsSVG(gene.names = c("ASNSD1", "LMO7", "MTCL1", "BLOC1S5;BLOC1S5-TXNDC5"), title = c("only_Perseus.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.Perseus.AvsV.1500[!(genes.Perseus.AvsV.1500 %in% genes.MSqRob.AvsV.1500) & !(genes.Perseus.AvsV.1500 %in% genes.Hurdle.AvsV.1500)], title = "only_Perseus.pdf", msnset = peptides.HEART2, pdf = TRUE)
### Overlap MSqRob and Perseus (Supplementary Fig. 12)
plot_proteins(gene.names = "ACO1", title = "ACO1", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "HNRNPK", title = "HNRNPK", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "SLC25A20", title = "SLC25A20", msnset = peptides.HEART2, pdf = FALSE)
plot_proteins(gene.names = "TPM4", title = "TPM4", msnset = peptides.HEART2, pdf = FALSE)
# plot_proteinsSVG(gene.names = c("ACO1", "HNRNPK", "SLC25A20", "TPM4"), title = c("overlap_MSqRob_Perseus.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.MSqRob.AvsV.1500[(genes.MSqRob.AvsV.1500 %in% genes.Perseus.AvsV.1500) & !(genes.MSqRob.AvsV.1500 %in% genes.Hurdle.AvsV.1500)], title = "overlap_MSqRob_Perseus.pdf", msnset = peptides.HEART2, pdf = TRUE)
### Venn diagrams atrial vs. ventricular region ###
### Checks: ###
all(Perseus.AvsV.ov$`Gene names` %in% hurdle.AvsV.ov$gene.name)
## [1] TRUE
all(Perseus.AvsV.ov$`Gene names` %in% MSqRob.AvsV.ov$gene.name)
## [1] TRUE
all(hurdle.AvsV.ov$gene.name %in% Perseus.AvsV.ov$`Gene names`)
## [1] TRUE
all(MSqRob.AvsV.ov$gene.name %in% Perseus.AvsV.ov$`Gene names`)
## [1] TRUE
dim(MSqRob.AvsV.ov)
## [1] 7822 9
dim(hurdle.AvsV.ov)
## [1] 7822 8
length(unique(Perseus.AvsV.ov$`Gene names`))
## [1] 7822
################
### Top: the first 500 significant genes ###
# A. Select top 500 DA gene identifiers for the Perseus analysis of ventricles vs. atria
genes.Perseus.AvsV.500 <- (Perseus.AvsV.ov %>% pull(`Gene names`) %>% unique)[1:500]
# B. Select top 500 DA gene identifiers for the Hurdle analysis of atria vs. ventricles
genes.Hurdle.AvsV.500 <- hurdle.AvsV.ov[1:500,]$gene.name
# C. Select top 500 DA gene identifiers for the MSqRob analysis of atria vs. ventricles
genes.MSqRob.AvsV.500 <- MSqRob.AvsV.ov[1:500,]$gene.name
# MSqRob alone
sum(!(genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500) & !(genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500))
## [1] 110
# 110
# Hurdle alone
sum(!(genes.Hurdle.AvsV.500 %in% genes.MSqRob.AvsV.500) & !(genes.Hurdle.AvsV.500 %in% genes.Perseus.AvsV.500))
## [1] 99
# 99
# Perseus alone
sum(!(genes.Perseus.AvsV.500 %in% genes.MSqRob.AvsV.500) & !(genes.Perseus.AvsV.500 %in% genes.Hurdle.AvsV.500))
## [1] 214
# 214
# Overlap MSqRob and Hurdle
sum((genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500) & !(genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500))
## [1] 144
# 144
# Overlap MSqRob and Perseus
sum((genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500) & !(genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500))
## [1] 29
# 43
# Overlap Hurdle and Perseus
sum((genes.Hurdle.AvsV.500 %in% genes.Perseus.AvsV.500) & !(genes.Hurdle.AvsV.500 %in% genes.MSqRob.AvsV.500))
## [1] 40
# 40
# Overlap everything
sum((genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500) & (genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500))
## [1] 217
# 217
# Control:
length(genes.MSqRob.AvsV.500)
## [1] 500
110+217+144+29
## [1] 500
length(genes.Hurdle.AvsV.500)
## [1] 500
99+144+217+40
## [1] 500
length(genes.Perseus.AvsV.500)
## [1] 500
214+40+29+217
## [1] 500
### Bottom: the first 1000 significant genes ###
# A. Select top 1000 DA gene identifiers for the Perseus analysis of ventricles vs. atria
genes.Perseus.AvsV.1000 <- (Perseus.AvsV.ov %>% pull(`Gene names`) %>% unique)[1:1000]
# B. Select top 1000 DA gene identifiers for the Hurdle analysis of atria vs. ventricles
genes.Hurdle.AvsV.1000 <- hurdle.AvsV.ov[1:1000,]$gene.name
# C. Select top 1000 DA gene identifiers for the MSqRob analysis of atria vs. ventricles
genes.MSqRob.AvsV.1000 <- MSqRob.AvsV.ov[1:1000,]$gene.name
# MSqRob alone
sum(!(genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000) & !(genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000))
## [1] 210
# 210
# Hurdle alone
sum(!(genes.Hurdle.AvsV.1000 %in% genes.MSqRob.AvsV.1000) & !(genes.Hurdle.AvsV.1000 %in% genes.Perseus.AvsV.1000))
## [1] 158
# 158
# Perseus alone
sum(!(genes.Perseus.AvsV.1000 %in% genes.MSqRob.AvsV.1000) & !(genes.Perseus.AvsV.1000 %in% genes.Hurdle.AvsV.1000))
## [1] 416
# 416
# Overlap MSqRob and Hurdle
sum((genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000) & !(genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000))
## [1] 325
# 325
# Overlap MSqRob and Perseus
sum((genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000) & !(genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000))
## [1] 67
# 67
# Overlap Hurdle and Perseus
sum((genes.Hurdle.AvsV.1000 %in% genes.Perseus.AvsV.1000) & !(genes.Hurdle.AvsV.1000 %in% genes.MSqRob.AvsV.1000))
## [1] 119
# 119
# Overlap everything
sum((genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000) & (genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000))
## [1] 398
# 398
# Control:
length(genes.MSqRob.AvsV.1000)
## [1] 1000
210+325+398+67
## [1] 1000
length(genes.Hurdle.AvsV.1000)
## [1] 1000
158+325+398+119
## [1] 1000
length(genes.Perseus.AvsV.1000)
## [1] 1000
416+67+119+398
## [1] 1000